In [69]:
import pandas as pd
In [3]:
import ghgmosh as gm
In [174]:
reload(gm)
Out[174]:
In [16]:
usa = pd.io.excel.read_excel("../CDPdata/s12_american.xlsx")
world = pd.io.excel.read_excel("../CDPdata/s12_world.xlsx")
In [5]:
world.columns
Out[5]:
In [6]:
usa.columns
Out[6]:
In [17]:
world.rename(columns={"International Security Identification Number":"ISIN",
'Net Income (Loss) - Consolidated': "income"}, inplace=True)
world = world[world["ISIN"].isnull()==False]
world = world[world["Revenue - Total"].isnull()==False]
len(world["ISIN"].value_counts().index) # 900
# chinese companies didn't return data
Out[17]:
In [18]:
usa.rename(columns={"Ticker Symbol":"Ticker",
'Net Income (Loss)':"income"}, inplace=True)
len(usa['Ticker'].value_counts().index) # 347
Out[18]:
In [19]:
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl")
In [20]:
orginfos["Ticker"] = orginfos["Ticker"].apply(lambda(x): str(x).split(" ")[0])
orginfos_byticker = orginfos.reset_index().set_index("Ticker")
orginfos_byticker = orginfos_byticker[orginfos_byticker["Country"]== "USA"]
orginfos_byisin = orginfos.reset_index().set_index("ISIN")
In [21]:
usa.set_index("Ticker", inplace=True)
world.set_index("ISIN", inplace=True)
In [22]:
usainfos = usa.join(orginfos_byticker[["Organisation"]])
worldinfos = world.join(orginfos_byisin[["Organisation"]])
In [23]:
len(usainfos["Organisation"].value_counts().index) # 347
len(worldinfos["Organisation"].value_counts().index) # 900 perfect
Out[23]:
In [24]:
usainfos = usainfos.reset_index().set_index("Organisation")
worldinfos = worldinfos.reset_index().set_index("Organisation")
In [31]:
s12_financials = pd.concat([usainfos, worldinfos])
s12_financials.rename(columns={"Data Year - Fiscal":"year", "Revenue - Total":"Revenues", "Cost of Goods Sold":"COGS",
'Stockholders Equity - Parent': "Equity", 'Assets - Total': "Assets", "income": "Income",
'Property, Plant and Equipment - Total (Net)':"PPE"},inplace=True)
s12_financials = s12_financials.reset_index().set_index(["Organisation","year"])
s12_financials.to_pickle("../CDPdata/s12_financials.pkl")
In [27]:
s12_financials["ISO Currency Code"].value_counts() # all USD thank goodness
Out[27]:
In [70]:
# combine with s12_completed to compute intensities
s12_financials = pd.read_pickle("../CDPdata/s12_financials.pkl")
scopes12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")
In [71]:
s12_ghgfins = scopes12_c[["Country", "GICS Industry",
"GICS Sector", "scope1", "scope2"]].join(s12_financials[fm.FINCOLS], how="inner")
In [72]:
s12_ghgfins["1and2 total"] = s12_ghgfins["scope1"]+ s12_ghgfins["scope2"]
s12_ghgfins["GICS Sector"].replace("Banks", "Financials", inplace=True)
In [36]:
import finmosh as fm
In [73]:
reload(fm)
Out[73]:
In [74]:
s = s12_ghgfins.reset_index()
s12_ghgfins = fm.adjust_inflation_all(s)
In [75]:
for cols in fm.FINCOLS:
s12_ghgfins[cols].fillna(0, inplace=True)
In [76]:
s12_ghgfins["1and2 intensity"] = s12_ghgfins["1and2 total"]/s12_ghgfins["Revenues"]
s12_ghgfins = s12_ghgfins[s12_ghgfins["1and2 intensity"].isnull() == False]
s12_ghgfins.set_index(["Organisation", "year"], inplace=True)
s12_ghgfins.sort_index(inplace=True)
In [78]:
len(s12_ghgfins.index.levels[0].value_counts().index) #1243 instead of 1247 requested from COMPUSTAT
len(s12_ghgfins) # 4673
Out[78]:
In [79]:
# revs and cogs are in millions USD jan 2015
# intensity is in tons/million
s12_ghgfins.to_pickle("../CDPdata/s12_ghgfins.pkl")
In [80]:
s12_ghgfins.head()
Out[80]:
In [83]:
s = s12_ghgfins.reset_index()
s[s.duplicated(["Organisation", "year"])]
Out[83]:
In [11]:
import numpy as np
from collections import OrderedDict
from bokeh.charts import Scatter
from bokeh.charts import Histogram
from bokeh.plotting import output_notebook, show
output_notebook()
In [12]:
import datavis as dv
In [70]:
ghgfins_s = s12_ghgfins.reset_index().set_index(["GICS Sector","year"])
ghgfins_yr = s12_ghgfins.reset_index().set_index("year")
In [71]:
ghgfins_s.index.levels[0]
Out[71]:
In [140]:
sector = "Consumer Staples"
int_s = OrderedDict()
for yr in range(2009,2014):
vs = np.array(ghgfins_s.loc[sector,yr]["1and2 intensity"].tolist())
int_s[str(yr)] = vs[vs<500]
In [124]:
intensity = OrderedDict()
for yr in range(2009,2014):
vs = np.array(ghgfins_yr.loc[yr]["1and2 intensity"].tolist())
intensity[str(yr)] = vs[vs<300]
In [141]:
# histograms of intensities over time for each sector
title = "Scope 1 and 2 Intensity Consumer Staples"
fname = "12intyear_energy.html"
hist = Histogram(int_s, bins=20, filename=fname, title = title, legend="top_right",
ylabel="Proportion of Companies", xlabel="Intensity in tCO2e/$million")
In [142]:
show(hist)
In [ ]: